/**THIS FILE SETS UP DATA FOR REPLICATION OF “The Effects of Education on Financial Outcomes: Evidence from Kenya”**/
 
clear all

// Set your local directory here
cd ""

global dr = "input_data/"
global d0 = "output_data/"
global dtab = "tables_figures/"


*log using "$dl/fpe_intensity.log", replace
tempfile temp temp1 

********************************************************************************
****************** Step 1: Get Intensity Measure from 1999 Census **************
********************************************************************************

use "$dr/KE census1999.dta", clear

******* 1.1 Generate intensity measures ***********

* Dummy for never attending primary *
gen double never_attend = (ke1999a_edattend == 96 | ke1999a_edattend == 97) if ke1999a_edattend!=99
gen double never_attend_female = (ke1999a_edattend == 96 | ke1999a_edattend == 97) if ke1999a_edattend!=99 & sex == 2
gen double never_attend_male = (ke1999a_edattend == 96 | ke1999a_edattend == 97) if ke1999a_edattend!=99 & sex == 1

* Dummy for not completing primary *
gen double no_primary = edattain == 1 if edattain!=0
gen double no_primary_female = edattain == 1 if edattain!=0 & sex == 2
gen double no_primary_male = edattain == 1 if edattain!=0 & sex == 1

* Years of primary completed *
gen double yrs_primary = 8 if ke1999a_edattend>8 & ke1999a_edattend<=18
replace yrs_primary = ke1999a_edattend if ke1999a_edattend>=0 & ke1999a_edattend<=9
replace yrs_primary = 0 if ke1999a_edattend == 96 | ke1999a_edattend == 97

gen double yrs_primary_female = yrs_primary if ke1999a_edattend!=99 & sex == 2
gen double yrs_primary_male = yrs_primary if ke1999a_edattend!=99 & sex == 1

* Dummy for being unemployed *
gen unemployed = empstat == 2 if (empstat == 1 | empstat == 2)

******* 1.2 Match counties with subregions ***********

* Convert birth place variable to province of birth variable
gen bpl_province = floor(bplke/100) if bplke<=899
label define provincelabel 1 "Nairobi" 2 "Central" 3 "Coast" 4 "Eastern" 5 "N.Eastern" ///
6 "Nyanza" 7 "RiftValley" 8 "Western"
label value bpl_province provincelabel

* District to county crosswalk, using county numbers from the FinAccess dataset
gen county = 101 if bplke==110			// Nairobi

replace county = 201 if bplke == 220	// NYANDARUA
replace county = 202 if bplke == 221	// NYERI
replace county = 203 if bplke == 205	// KIRINYAGA
replace county = 204 if bplke == 222 | bplke ==224	// MURANG'A
replace county = 205 if bplke == 208 | bplke == 210 | bplke == 223 	// KIAMBU

replace county = 301 if bplke == 301	// Mombasa
replace county = 302 if bplke == 303	// Kwale
replace county = 303 if bplke == 306 | bplke == 308	// KILIFI
replace county = 304 if bplke == 309	// TANA RIVER
replace county = 305 if bplke == 311	// LAMU
replace county = 306 if bplke == 320	// TAITA-TAVETA

replace county = 401 if bplke == 401 | bplke == 404	// MARSABIT
replace county = 402 if bplke == 405	// ISIOLO
replace county = 403 if bplke == 410 | bplke == 431 | bplke == 432 | bplke == 433	// MERU
replace county = 404 if bplke == 414	// THARAKANITHI
replace county = 405 if bplke == 415 | bplke == 416	// EMBU
replace county = 406 if bplke == 419 | bplke == 430	// KITUI
replace county = 407 if bplke == 421	// MACHAKOS
replace county = 408 if bplke == 425	// MAKUENI

replace county = 501 if bplke == 501	// Garissa
replace county = 502 if bplke == 520 	// Wajir
replace county = 503 if bplke == 521	// Mandera

replace county = 601 if bplke == 601 | bplke == 602	// SIAYA
replace county = 602 if bplke == 630 | bplke == 606	// KISUMU
replace county = 603 if bplke == 610 | bplke == 631	// MIGORI
replace county = 604 if bplke == 607 | bplke == 608 | bplke == 609	// HOMABAY
replace county = 605 if bplke == 614 | bplke == 615 | bplke == 632	// KISII
replace county = 606 if bplke == 633 // NYAMIRA

replace county = 701 if bplke == 750	// TURKANA
replace county = 702 if bplke == 704	// WEST POKOT
replace county = 703 if bplke == 751	// SAMBURU
replace county = 704 if bplke == 752	// TRANSNZOIA
replace county = 705 if bplke == 713 | bplke == 716	// BARINGO
replace county = 706 if bplke == 758	// UASIN GISHU
replace county = 707 if bplke == 757 | bplke == 720 | bplke == 721	// ELEGEYO MARAKWET
replace county = 708 if bplke == 753	// NANDI
replace county = 709 if bplke == 754	// LAIKIPIA
replace county = 710 if bplke == 730	// NAKURU
replace county = 711 if bplke == 755 | bplke == 736	// NAROK
replace county = 712 if bplke == 756 | bplke == 744	// KAJIADO
replace county = 713 if bplke == 739 | bplke == 741	// KERICHO
replace county = 714 if bplke == 743	// BOMET

replace county = 801 if bplke == 805 | bplke == 830 | bplke == 831	// KAKAMEGA
replace county = 802 if bplke == 806	// VIHIGA
replace county = 803 if bplke == 832 | bplke == 815	// BUNGOMA
replace county = 804 if bplke == 816 | bplke == 833	// BUSIA

tab bplke if mi(county)	// got everyone that isn't foreign born or unknown
* Drop those with unknown birthplace and those foreign born
drop if bplke >=900

*** Get sub-regions from FinAccess Data ***
gen a2 = county

* Get county to sub-region crosswalk from FinAccess file
preserve 
	use "$dr/FinAccess_2016_data_final_anonymized.dta", clear
	keep sub_region a2
	rename a2 county
	duplicates drop
	tempfile county_subregion_cross
	save `county_subregion_cross'
	
	rename county county_reside
	rename sub_region sub_region_reside
	tempfile county_subregion_cross_reside
	save `county_subregion_cross_reside'
	
	
restore

merge m:1 county using `county_subregion_cross', nogen assert(3)

* Collapse down to obtain average that:
* (1) Never attended primary
* (2) Did not complete primary school
* (3) Years of primary school complted
* All within each sub-region, overall and separately by gender

* Keep only this age group *
keep if age>=15&age<=25

* Obtain subregion level averages *
preserve
collapse (mean) no_primary no_primary_female no_primary_male never_attend ///
never_attend_female never_attend_male yrs_primary yrs_primary_female yrs_primary_male ///
unemployment_rate = unemployed [pw = perwt], by(sub_region)

egen fpe_med		= median(no_primary)
gen  fpe_abovemed	= no_primary>fpe_med

egen fpe_med_female		= median(no_primary_female)
gen  fpe_abovemed_female= no_primary_female>fpe_med_female

egen fpe_med_male		= median(no_primary_male)
gen  fpe_abovemed_male	= no_primary_male>fpe_med_male

save "$d0/fpe_intensity.dta", replace

restore

* Obtain province level averages *
collapse (mean) no_primary no_primary_female no_primary_male never_attend ///
never_attend_female never_attend_male yrs_primary yrs_primary_female yrs_primary_male ///
unemployment_rate = unemployed [pw = perwt], by(bpl_province)

save "$d0/fpe_intensity_province.dta", replace

********************************************************************************
********************* Step 2: Get 2009 Unemployment Rate *********************
********************************************************************************

use "$dr/KE census2009.dta", clear

rename geo1_ke2009 provke
tostring geo2_ke2009, gen(dist_string)
gen distke = substr(dist_string, 1, 1) + substr(dist_string, 3, 2)
destring distke, replace

gen unemployed = empstat == 2 if (empstat == 1 | empstat == 2)

gen sub_region = .

* Nairobi *
replace sub_region = 1 if provke  == 1
* Central *
replace sub_region = 2 if provke  == 2
* Mombasa *
replace sub_region = 3 if distke == 301
* Coastal 
replace sub_region = 4 if provke == 3 & distke != 301	 
* Upper Eastern - Marsabit, Isiolo
foreach x in 401 402 403 404 405 406 {
replace sub_region = 5 if distke == `x'
}
* Mid-Eastern region * Meru, Tharaka Nithi, Embu.
foreach x in 407 408 409 410 411 412 413 414 415 416 431 432 433 434 {
replace sub_region = 6 if distke == `x'	  
}
* Lower Eastern region * Kitui, Machakos, Makueni.
foreach x in 417 418 419 420 421 422 423 424 425 426 427 428 430 {
replace sub_region = 7 if distke == `x' 
}
* North Eastern region *
replace sub_region = 8 if provke == 5
* Nyanza region * 
replace sub_region = 9 if provke == 6
* North Rift region * Turkana, Samburu and West Pokot.
foreach x in 701 702 703 704 705 706 707 708 709 750 751 {
replace sub_region = 10 if distke == `x'  
}
* Central Rift region * TransNzoia, Baringo, Uasin Gishu, Elgeyo Marakwet, Laikipia, Nakuru and Nandi.
foreach x in 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 752 753 754 757 758 {
replace sub_region = 11 if distke == `x'  
}
* South Rift region * Kajiado, Narok, Bomet, Kericho.
foreach x in 734 735 736 737 738 739 740 741 742 743 744 755 756   {
replace sub_region = 12 if distke == `x'	  
}
* Western region * 
replace sub_region = 13	if provke == 8  

collapse (mean) unemployment_2009 = unemployed [pw = perwt], by(sub_region)

compress
save "$d0/subregion_unemployment_2009.dta", replace

********************************************************************************
**** Step 3: Merge Intensity with 2015 FinAccess and Prepare for Analysis	****
********************************************************************************


** Load in Financial Access data from 2015
use "$dr/FinAccess_2016_data_final_anonymized.dta", clear

* Merge with share of 15-25 yr olds without primary education in 1999
merge m:1 sub_region using "$d0/fpe_intensity.dta", nogen

lab var unemployment_rate "Unemployment rate (1999)"

gen double potential_yrs_prim = 8-yrs_primary
lab var potential_yrs_prim "Potential years of primary"

********************************************************************************
**** 				Step 4: Create variables of interest for Analysis		****
********************************************************************************


* Get our first stage outcome variable of interest
gen no_education = a_14 == 1
lab var no_education "No formal education"

gen some_primary = a_14>=2
lab var some_primary "Some primary"

gen some_primary_only = a_14==2
lab var some_primary_only "Some primary"

gen completed_primary = a_14>=3
lab var completed_primary "Completed primary"

gen completed_primary_only = a_14==3
lab var completed_primary_only "Completed primary"

gen some_secondary = a_14>=4
lab var some_secondary "Some secondary"

gen some_secondary_only = a_14==4
lab var some_secondary_only "Some secondary"

gen completed_secondary = a_14>=5
lab var completed_secondary "Completed secondary"

gen completed_secondary_only = a_14==5
lab var completed_secondary_only "Completed secondary"

gen some_postsecondary = a_14>=6
lab var some_postsecondary "Some postsecondary"

gen completed_postsecondary = a_14 == 7 if !mi(a_14)
lab var completed_postsecondary "Completed postsecondary"

* Convert education levels to years *
gen educ_yrs = 0 if a_14 == 1
replace educ_yrs = 4 if a_14 == 2
replace educ_yrs = 8 if a_14 == 3
replace educ_yrs = 10 if a_14 == 4
replace educ_yrs = 12 if a_14 == 5
replace educ_yrs = 14 if a_14 == 6
replace educ_yrs = 16 if a_14 == 7
lab var educ_yrs "Years of education"

* Create education variable top coded at completing secondary/12 years *
gen educ_lvl_censored = a_14
replace educ_lvl_censored = 5 if educ_lvl_censored>=5 & !mi(educ_lvl_censored)
lab var educ_lvl_censored "Education level (censored)"

gen educ_yrs_censored = educ_yrs
replace educ_yrs_censored = 12 if educ_yrs_censored>=12 & !mi(educ_yrs_censored)
lab var educ_yrs_censored "Years of education (censored)"

* This is our treatment indicator. Our control group are those 28-30
gen FPE = age>=16 & age<=18 if age<=30

* This is our key Diff-in-Diff interaction
gen Intensity = never_attend
gen FPExintensity = FPE * Intensity
lab var FPExintensity "FPE $\times$ Intensity"

lab var no_primary "Intensity"
lab var no_primary_female "Intensity (Female)"
lab var no_primary_male "Intensity (Male)"

lab var never_attend "Intensity"
lab var never_attend_female "Intensity (Female)"
lab var never_attend_male "Intensity (Male)"

* Generate demographic variables *
gen female = gender_of_respondent == 2
lab var female "Female"
lab var age "Age"

gen married = a_9 == 4 if a_9 != 5
lab var married "Currently married"
gen ever_married = (a_9 == 2 | a_9 == 3 | a_9 == 4) if a_9 != 5
lab var ever_married "Ever married"

forv x = 16(1)30 {
gen age_`x' = age == `x'
lab var age_`x' "Age = `x'"
gen IntensityXage_`x' = age_`x' * Intensity
lab var IntensityXage_`x' "Age = `x' $\times$ Intensity"
}	
	
*** Generate key outcome variables *****

* Effective Numeracy *
gen effective_numeracy_1 = c1 == 1 if !mi(c1)
lab var effective_numeracy_1 "C1. Correct: A group of 5 win KSh 100,000. How much each?"

gen effective_numeracy_2 = c2 == 1 if !mi(c2)
lab var effective_numeracy_2 "C2. Correct: KSh 10,000 loan, 10\% interest/year. Interest at end of year?"

gen effective_numeracy_1_w = c1 == 2 if !mi(c1)
lab var effective_numeracy_1_w "C1. Incorrect: A group of 5 win KSh 100,000. How much each?"

gen effective_numeracy_2_w = c2 == 2 if !mi(c2)
lab var effective_numeracy_2_w "C2. Incorrect: KSh 10,000 loan, 10\% interest/year. Interest at end of year?"

gen effective_numeracy_1_dk = c1 == 3 if !mi(c1)
lab var effective_numeracy_1_dk "C1. Don't Know: A group of 5 win KSh 100,000. How much each?"

gen effective_numeracy_2_dk = c2 == 3 if !mi(c2)
lab var effective_numeracy_2_dk "C2. Don't Know: KSh 10,000 loan, 10\% interest/year. Interest at end of year?"

gen effective_numeracy_1_k = c1 == 1 if c1<3
lab var effective_numeracy_1_k "C1. Correct if Know: A group of 5 win KSh 100,000. How much each?"

gen effective_numeracy_2_k = c2 == 1 if c2<3
lab var effective_numeracy_2_k "C2. Correct if Know: KSh 10,000 loan, 10\% interest/year. Interest at end of year?"

* Use of financial products *
gen has_bank_product = e4_1a == 1
note has_bank_product: E4. Currently has a bank product (Excluding Mshwari, KCB Mpesa)
lab var has_bank_product "Currently banked (excl. Mpesa)"

gen has_bank_product2 = e4_1 == 1
note has_bank_product2: E4. Currently has a bank product (Including Mshwari, KCB Mpesa)
lab var has_bank_product2 "Currently banked (incl. Mpesa)"

gen has_only_mpesa = has_bank_product2 - has_bank_product
lab var has_only_mpesa "E4. Only bank product is Mshwari or KCB Mpesa"

gen never_banked = e4_2 == 1
lab var never_banked "E4. Never banked (Excluding Mshwari, KCB Mpesa)"

gen never_banked2 = e4_2 == 1 & e1_8 == 3 & e1_13 == 3 & e1_37 == 3
lab var never_banked2 "E4. Never banked (Including Mshwari, KCB Mpesa)"

gen ever_banked = 1 - never_banked
note ever_banked: E4. Ever banked (Excluding Mshwari, KCB Mpesa)
lab var ever_banked "Ever banked (excl. Mpesa)"

gen ever_banked2 = 1 - never_banked2
note ever_banked2: E4. Ever banked (Including Mshwari, KCB Mpesa)
lab var ever_banked2 "Ever banked (incl. Mpesa)"

gen ever_banked_mpesa_only = ever_banked2-ever_banked
lab var ever_banked_mpesa_only "Ever banked through Mpesa only"

gen any_savings = e4_5 == 1 if !mi(e4_5)
lab var any_savings "Any savings (formal or informal)"

gen banked_only_mobile = (e1_8==1|e1_8==2) | (e1_13==1|e1_13==2)
lab var banked_only_mobile "Ever banked only through mobile"

gen banked_only_mobile2 = (e1_8==1|e1_8==2) | (e1_13==1|e1_13==2) | (e1_37==1|e1_37==2)
lab var banked_only_mobile2 "Ever banked through Mpesa"

*** Specific Financial Products ***
**"Formal financial product" includes FinAcces definition of "bank product" plus co-ops and MFIs***
gen has_formal_financial_product = (e4_1 == 1 | e1_1 == 1 | e1_2 == 1)
lab var has_formal_financial_product "Currently has a formal financial product (incl. Mpesa)"
gen ever_formal_financial_product = (ever_banked2 == 1 | e1_1 != 3 | e1_2 != 3)
lab var ever_formal_financial_product "Ever used a formal financial product (incl. Mpesa)"
gen has_formal_savings_product = (e1_8 == 1 | e1_29 == 1 | e1_30 == 1 | e1_31 == 1 | e1_32 == 1)
lab var has_formal_savings_product "Currently has a formal savings product (incl. Mpesa)"
gen ever_formal_savings_product = (e1_8 != 3 | e1_29 != 3 | e1_30 != 3 | e1_31 != 3 | e1_32 != 3)
lab var ever_formal_savings_product "Ever used a formal savings product (incl. Mpesa)"
gen has_formal_loan_credit_product = (e1_12==1 | e1_13==1 | e1_15==1 | e1_16==1 | e1_26==1 | e1_27==1 | e1_28==1 | e1_35==1)
lab var has_formal_loan_credit_product "Currently has a formal loan/credit product (incl. Mpesa)"
gen ever_formal_loan_credit_product = (e1_12!=3 | e1_13!=3 | e1_15!=3 | e1_16!=3 | e1_26!=3 | e1_27!=3 | e1_28!=3 | e1_35!=3)
lab var ever_formal_loan_credit_product "Ever used a formal loan/credit product (incl. Mpesa)"
gen insurance_product = e4_9 == 1
lab var insurance_product "Currently has an insurance product"


** Income variables **

* Replace refused to answer income with missing *
forv x = 1(1)13 {
replace d11_`x' = . if d11_`x' == 1
}

egen all_income = rowtotal(d11_? d11_??), missing

egen earned_income = rowtotal(d11_1 d11_2 d11_3 d11_4 d11_5 d11_7 d11_8 d11_9 d11_10 d11_12), missing
replace earned_income = 0 if !mi(all_income) & mi(earned_income)
replace earned_income = 0 if total_income == 0 & mi(earned_income)
replace earned_income = 0 if total_income == 0 & mi(monthly_income)
lab var earned_income "Monthly income"

gen log_earned_income1 = log(1+earned_income)
lab var log_earned_income1 "Log (1 + monthly income)"

gen log_earned_income2 = log(earned_income)
lab var log_earned_income2 "Log (monthly income)"

gen IHST_earned_income = log(earned_income+sqrt(earned_income^2+1))
lab var IHST_earned_income "IHST of monthly income"

gen earned_any_income = earned_income>0 if !mi(earned_income)
lab var earned_any_income "Earned any income"

gen log_income = log(all_income)
lab var log_income "Log monthly income"

lab var all_income "Monthly income"



**** Main Sources of income ****

gen farming_most_money = d3_1 == 1
gen employed_most_money = d3_1 == 2
gen casual_most_money = d3_1 == 3
gen self_employ_most_money = d3_1 == 4
gen family_friends_most_money = d3_1 == 6
gen other_most_money = (d3_1 != 1 & d3_1 != 2 & d3_1 != 3 & d3_1 != 4 & d3_1 != 6)

lab var farming_most_money "Farming"
lab var employed_most_money "Employed"
lab var casual_most_money "Casual employment"
lab var self_employ_most_money "Self-employed"
lab var family_friends_most_money "Family/friends/spouse"
lab var other_most_money "Other sources"

* Indicators for different samples *
gen sample1 = 1
lab var sample1 "All ages"
gen sample2 = (age<=18 | (age>=28 & age<=30))
lab var sample2 "Ages 16-18; 28-30"

*** Robustness Sample 1: Exclude Mombasa and Nairobi ***

gen sample3 = sub_region!=1 & sub_region!=3
lab var sample3 "Excluding Nairobi and Mombasa"

gen sample4 = sub_region!=1 & sub_region!=3 & sample2 == 1
lab var sample4 "Excluding Nairobi and Mombasa - 16-18, 28-30"

*** Robustness Sample 2: Exclude those that moved in the past year ***

gen sample5 = a_12 == 2
lab var sample5 "Excluding migrants in last 12 months"

gen sample6 = a_12 == 2 & sample2 == 1
lab var sample6 "Excluding migrants in last 12 months - 16-18, 28-30"

lab var a_14 "A14. What is the highest level of formal education completed?"

lab var numeracy "Numeracy from Section C"
lab var fin_literacy "Financial Literacy from Section B"

gen moved = a_12 == 1 if !mi(a_12)
lab var moved "Changed residence in last 12 months"

* Transform this variable so that a higher number is associated with higher numeracy
gen numeracy2 = 4-numeracy
tab numeracy numeracy2
note numeracy2: = 4-numeracy, since in raw numeracy variable 1= high numeracy
lab var numeracy2 "Effective numeracy"

forv x = 1(1)9 {
gen b2_`x'_ = b2_`x' if b2_`x' == 1
replace b2_`x'_ = 0 if b2_`x' == 2
rename b2_`x' b2_`x'_orig
rename b2_`x'_ b2_`x'
}

egen b2 = rowtotal(b2_?)

lab var b2 "How many of 9 financial service terms have you heard of?"

tab b2 fin_literacy

*** Religion ***
gen christian = a_11 == 1 if a_11 != 6
lab var christian "Christian"
gen muslim = a_11 == 2 if a_11 != 6
lab var muslim "Muslim"
gen other_religion = (a_11 == 3 |a_11 == 4 | a_11 == 5 |a_11 == 7)  if a_11 != 6
lab var other_religion "Other religion"

*** Retirement Planning ***
gen retire_own_plans = 0 if g1_1 != 12
lab var retire_own_plans "Forward looking retirement"
gen retire_no_plans = 0 if g1_1 != 12
lab var retire_no_plans "No retirement plans"
gen retire_safety_net = 0 if g1_1 != 12
lab var retire_safety_net "Public/private safety net retirement"

forv x = 1(1)7 {
replace retire_own_plans = 1 if (g1_`x' == 1 | g1_`x' == 2 | g1_`x' == 6)
replace retire_no_plans = 1 if (g1_`x' == 10 | g1_`x' == 11)
replace retire_safety_net = 1 if (g1_`x' == 3 | g1_`x' == 8)
}

replace retire_no_plans = 1 if g1_9_1 == 995
replace retire_no_plans = 1 if g1_9_1 == 16	//	"God will provide"

*** Informal Savings Group ***
gen informal_savings_group = h1!=0
lab var informal_savings_group "Member of informal savings group"

*** Financial Risk ***
lab var emergency_funds "Able to get money in case of emergency"

gen safe_place_money = (p8_1 == 1 | p8_2 == 1)
lab var safe_place_money "Have a safe place to save money"

gen improved_financial_life = 3 if p9 == 1
replace improved_financial_life = 2 if p9 == 2
replace improved_financial_life = 1 if p9 == 3
lab var improved_financial_life "Improved financially over year"

*** Gen has mobile phone ***
gen mobile_phone = f1 == 1
lab var mobile_phone "Owns mobile phone"

*** Gen internet usage ***
gen internet_usage = n2 == 1 if !mi(n2)
lab var internet_usage "Accessed internet in past 4 weeks"

*** Access to financial services ****

lab def time_fin_provid 1 "Under 10 minutes" 2 "About 10 to 30 minutes" 3 "Over 30 mins to 1 hour" 4 "About 2 hours" ///
5 "About 3 hours" 6 "About 4 hours" 7 "about 5 hours" 8 "About 6 hours" 9 "7 hours or more" 10 "Don't know"

lab val q3a_2 time_fin_provid
lab val q3b_2 time_fin_provid
lab val q4_2 time_fin_provid
lab val q5_2 time_fin_provid
lab val q6_2 time_fin_provid
lab val q7_2 time_fin_provid

gen time_to_bank_branch = q3a_2 if q3a_2!=10
lab val time_to_bank_branch time_fin_provid
lab var time_to_bank_branch "If you had to go to the nearest bank branch, how long would it take you to get there"

gen dk_time_to_bank_branch = q3a_2 == 10 if !mi(q3a_2)
replace dk_time_to_bank_branch = 1 if q3a_1 == 8	// Don't know where nearest is 
lab var dk_time_to_bank_branch "Don't know how long to nearest bank branch"

gen time_to_mpesa = q3b_2 if q3b_2!=10
lab val time_to_mpesa time_fin_provid
lab var time_to_mpesa "If you had to go to the nearest Mobile Money Agent, how long would it take you to get there"

gen dk_time_to_mpesa = q3b_2 == 10 if !mi(q3b_2)
replace dk_time_to_mpesa = 1 if q3b_1 == 8	// Don't know where nearest is 
lab var dk_time_to_mpesa "Don't know how long to nearest Mobile Money Agent"

gen time_to_bank_agent = q4_2 if q4_2 != 10
lab val time_to_bank_agent time_fin_provid
lab var time_to_bank_agent "If you had to go to the nearest Bank Agent, how long would it take you to get there"

gen dk_time_to_bank_agent = q4_2 == 10 if !mi(q4_2)
replace dk_time_to_bank_agent = 1 if q4_1 == 8	// Don't know where nearest is 
lab var dk_time_to_bank_agent "Don't know how long to nearest Bank Agent"

gen time_to_fin_serv_provid = q5_2 if q5_2!=10
lab val time_to_fin_serv_provid time_fin_provid
lab var time_to_fin_serv_provid "If you had to go to the financial service provider closest to you, how long would it take you to get there"

gen dk_time_to_fin_serv_provid = q5_2 == 10 if !mi(q5_2)
replace dk_time_to_fin_serv_provid = 1 if q5_1 == 8	// Don't know where nearest is 
lab var dk_time_to_fin_serv_provid "Don't know how long to nearest financial service provider"

*** Shocks ***

gen had_shock = p1_1 !=17 if !mi(p1_1) & p1_1!=18
lab var had_shock "Shock to finances of household"

gen shock_savings = p3_1 == 1 if !mi(p3_1) & p3_1!=14 & p3_1!=16 & p3_1!=17 
replace shock_savings = 1 if p3_2 == 1
replace shock_savings = 1 if p3_3 == 1
replace shock_savings = 1 if p3_4 == 1
lab var shock_savings "Used savings"


gen shock_nothing = p3_1 == 15 if !mi(p3_1) & p3_1!=14 & p3_1!=16 & p3_1!=17 
replace shock_nothing = 1 if p3_2 == 15
replace shock_nothing = 1 if p3_3 == 15
replace shock_nothing = 1 if p3_4 == 15
lab var shock_nothing "Did nothing"


gen shock_family_church = p3_1 == 5 if !mi(p3_1) & p3_1!=14 & p3_1!=16 & p3_1!=17 
replace shock_family_church = 1 if p3_2 == 5
replace shock_family_church = 1 if p3_3 == 5
replace shock_family_church = 1 if p3_4 == 5
lab var shock_family_church "Help from family/church/mosque"

gen shock_fundraising = p3_1 == 11 if !mi(p3_1) & p3_1!=14 & p3_1!=16 & p3_1!=17 
replace shock_fundraising = 1 if p3_2 == 11
replace shock_fundraising = 1 if p3_3 == 11
replace shock_fundraising = 1 if p3_4 == 11
lab var shock_fundraising "Fundraising"

gen shock_sell_assets = p3_1 == 7 if !mi(p3_1) & p3_1!=14 & p3_1!=16 & p3_1!=17 
replace shock_sell_assets = 1 if p3_2 == 7
replace shock_sell_assets = 1 if p3_3 == 7
replace shock_sell_assets = 1 if p3_4 == 7
lab var shock_sell_assets "Sold assets"


gen shock_borrow = (p3_1 == 2 | p3_1 == 3 | p3_1 == 4) if !mi(p3_1) & p3_1!=14 & p3_1!=16 & p3_1!=17 
replace shock_borrow = 1 if (p3_2 == 2 | p3_2 == 3 | p3_2 == 4) 
replace shock_borrow = 1 if (p3_3 == 2 | p3_3 == 3 | p3_3 == 4) 
replace shock_borrow = 1 if (p3_4 == 2 | p3_4 == 3 | p3_4 == 4) 
lab var shock_borrow "Borrowed from bank, moneylender, etc."

gen shock_other = (p3_1 == 6 | p3_1 == 8 | p3_1 == 9 | p3_1 == 9 | p3_1 == 10 | p3_1 == 12 | p3_1 == 13) if !mi(p3_1) & p3_1!=14 & p3_1!=16 & p3_1!=17 
replace shock_other = 1 if (p3_1 == 6 | p3_1 == 8 | p3_1 == 9 | p3_1 == 9 | p3_1 == 10 | p3_1 == 12 | p3_1 == 13)
replace shock_other = 1 if (p3_2 == 6 | p3_2 == 8 | p3_2 == 9 | p3_2 == 9 | p3_2 == 10 | p3_2 == 12 | p3_2 == 13)
replace shock_other = 1 if (p3_3 == 6 | p3_3 == 8 | p3_3 == 9 | p3_3 == 9 | p3_3 == 10 | p3_3 == 12 | p3_3 == 13)
lab var shock_other "Other"

********************************************************************************
************************** Create Summary Indices  *****************************
********************************************************************************

* This section creates summary indices similar to Kling, Liebaman, and Katz (2007)
** The Kling indices demean the variable with respect to the control group **
** Our ``control" group will be those 28-30 in below median intensity sub-regions **

su never_attend [aw=popwgt_raw], d

gen ctrl = never_attend<=`r(p50)' & age>=28 & age<=30


*********************** Financial Inclusion Index ******************************

local i = 0
foreach var of varlist ever_banked2 has_bank_product2 ever_formal_savings_product  {

local i = `i' + 1

sum `var'   [aw = popwgt_raw] if ctrl == 1
gen fin_incl_index_`i' = (`var' - r(mean))/r(sd)  

}


egen fin_incl_kling_index = rowmean(fin_incl_index_*)

drop fin_incl_index_*


************************* Financial Capability Index ***************************
local i = 0
foreach var of varlist b2 numeracy2 retire_own_plans informal_savings_group emergency_funds ///
safe_place_money /*improved_financial_life*/ any_savings {


local i = `i' + 1

sum `var'   [aw = popwgt_raw] if ctrl == 1
gen fin_capib_index_`i' = (`var' - r(mean))/r(sd)  

}


egen fin_capib_kling_index = rowmean(fin_capib_index_*)

drop fin_capib_index_*


************************ Economic Self-Sufficiency Index ***********************

* Generate  variable that is a dummy for if they do not rely on friends/family
* as primary source of money. This way a positive number is the direction
* we want for economic self-sufficiency
gen income_independence = 1-family_friends_most_money

local i = 0
foreach var of varlist earned_any_income earned_income income_independence {

local i = `i' + 1

sum `var'   [aw = popwgt_raw] if ctrl == 1
gen econ_self_suff_index_`i' = (`var' - r(mean))/r(sd)  

}

egen econ_self_suff_kling_index = rowmean(econ_self_suff_index_*)
drop econ_self_suff_index_*

lab var fin_incl_kling_index "Formal Financial Inclusion"
lab var fin_capib_kling_index "Financial Capability"
lab var econ_self_suff_kling_index "Economic Self-Sufficiency"


compress

* Save the basefile for our analysis *
lab data "Created in 0 fpe_intensity.do"
save "$d0/fpe_FinAccess_basefile.dta", replace 


********************************************************************************
**** Step 5: calculate changes in distance to banking products over time	****
********************************************************************************

insheet using "$dr\FinAccess_Retail_2009_public.tab", tab clear

gen under_10 = b1_2 == 1 if b1_2<10
gen under_30 = b1_2 <=2 if b1_2<10
gen under_60 = b1_2 <=3 if b1_2<10

rename a1 province
rename b1_2 min_dist

collapse (mean) under_10 under_30 under_60 min_dist [aw=indv_wgt], by(province)
gen year = 2009

save "$d0\mean_access_2009.dta", replace

********************************************************************************
**** 	Step 6: Calculate access to financial services from 2015 survey		****
********************************************************************************


use "$dr\FinAccess_2016_data_final_anonymized.dta", clear

egen min_dist = rowmin(q3a_2 q3b_2 q4_2 q5_2)

gen under_10 = min_dist==1 if min_dist<10
gen under_30 = min_dist<=2 if min_dist<10
gen under_60 = min_dist<=3 if min_dist<10

* Set don't know to all to missing *
replace min_dist = . if min_dist == 10

count if min_dist == q3b_2 & !mi(min_dist) & !mi(q3b_2)	// 7478
count if min_dist < q3b_2 & !mi(min_dist) & !mi(q3b_2) // 916
* So, most of the time the closest mpesa agent is AT LEAST as close as the closest banking product.

collapse (mean) under_10 under_30 under_60 min_dist [aw=popwgt_raw], by(province)
gen year = 2015

*** Calculate changes in distance to closest financial services over time ***

append using "$d0\mean_access_2009.dta"

reshape wide under_10 under_30 under_60 min_dist, i(province) j(year)

gen intensity_under10 = under_102015-under_102009
gen intensity_under30 = under_302015-under_302009
gen intensity_under60 = under_602015-under_602009
gen intensity_ordinal = min_dist2009-min_dist2015

lab var intensity_under10 "Province level change in closest banking product under 10 minutes 09-15"
lab var intensity_under30 "Province level change in closest banking product under 30 minutes 09-15"
lab var intensity_under60 "Province level change in closest banking product under 60 minutes 09-15"
lab var intensity_ordinal "Province level ordinal change in closest banking product 09-15"

list province intensity*

corr intensity*

keep province intensity* min_dist2009 min_dist2015

save "$d0\2009_2015_access_change.dta", replace

****** Include these as an additional control interacted with FPE *****

use "$d0/fpe_FinAccess_basefile.dta", clear

merge m:1 province using "$d0\2009_2015_access_change.dta", nogen


********************************************************************************
**** 			Step 7: Calculate 2014 DHS attendance by age				****
********************************************************************************

use "$dr\KEHR70FL.dta", clear

forv i = 1(1)9 {
 rename sh18_0`i' sh18_`i'
 rename hv104_0`i' hv104_`i'
 rename hv105_0`i' hv105_`i'
} 

desc sh18_1 hv105_1 hv104_1

keep sh18_* hv105_* hv104_* hhid hv005 hv024

reshape long sh18_ hv105_ hv104_, i(hhid hv005 hv024) j(memid)

lab var sh18_ "School attendance"
lab var hv105_ "Age"
lab var hv104_ "Gender"

* The DHS is only representative at the province level, not the subregion

forv x = 12(1)24 {
	gen attend_`x' = sh18_ == 1 if hv105_ == `x'
}

gen attend = sh18_ == 1

gen attend_15_17 = sh18_ == 1 if hv105_ >=15 & hv105_<=17
gen attend_16_18 = sh18_ == 1 if hv105_ >=16 & hv105_<=18

decode hv024, gen(province)
replace province = proper(province)

gen age = hv105_

	replace attend = 0 if age>24
	collapse (mean) dhs_2014_attend=attend [aw=hv005], by(province age)
	
	rename province province_2
	gen province = 1 if province_2 == "Nairobi"
	replace province = 2 if province_2 == "Central"
	replace province = 3 if province_2 == "Coast"
	replace province = 4 if province_2 == "Eastern"
	replace province = 5 if province_2 == "North Eastern"
	replace province = 6 if province_2 == "Nyanza"
	replace province = 7 if province_2 == "Rift Valley"
	replace province = 8 if province_2 == "Western"
	drop province_2
	lab data "dhs_attendance_2014.do"
	save "$d0/dhs_attendance_age.dta", replace

**** Merge on 2014 DHS school attendance by age ***
use "$d0/fpe_FinAccess_basefile.dta", clear
merge m:1 province age using "$d0/dhs_attendance_age.dta", keep(1 3) nogen
compress
save "$d0/fpe_FinAccess_basefile.dta", replace


********************************************************************************
**** 	Step 8: Clean up 2006 FinAccess data file to use for falsification	****
********************************************************************************



use "$dr/FinAccess_Retail_2006_public.dta", clear

* This dataset is representative at the province level *

gen bpl_province = province
gen urban = cluster_type
merge m:1 bpl_province using "$d0/fpe_intensity_province.dta", assert(2 3) keep(3) nogen
drop bpl_province

gen female = gender == 2

foreach x in 19 20 21 31 32 33 {
gen age_`x' = age == `x'
}

gen some_primary = education>=2 if education<=7
lab var some_primary "Some Primary"

gen completed_primary = education>=3 if education<=7
lab var completed_primary "Completed Primary"

gen some_secondary = education>=4 if education<=7
lab var some_secondary "Some Secondary"

gen completed_secondary = education>=5 if education<=7
lab var completed_secondary "Completed Secondary"

replace education = . if education == 8 // Should only be 1-7
lab var education "Highest Education Level"

gen currently_banked = a4 == 1
lab var currently_banked "Currently banked"
gen ever_banked = (a4 == 1 | a4 == 2)
lab var ever_banked "Ever banked"

gen farming_most_money = b2 == 3
replace farming_most_money = 1 if b2 == 4
replace farming_most_money = 1 if b2 == 5
replace farming_most_money = 1 if b2 == 6
replace farming_most_money = 1 if b2 == 7

gen employed_most_money = b2 == 8
replace employed_most_money = 1 if b2 == 11
replace employed_most_money = 1 if b2 == 12
replace employed_most_money = 1 if b2 == 13
replace employed_most_money = 1 if b2 == 14

gen casual_most_money = b2 == 9
replace casual_most_money = 1 if b2 == 10

gen self_employ_most_money = b2 == 15
replace self_employ_most_money = 1 if b2 == 16
replace self_employ_most_money = 1 if b2 == 17

gen family_friends_most_money = b2 == 2

gen other_most_money = b2 == 18
replace other_most_money = 1 if b2 == 19
replace other_most_money = 1 if b2 == 20
replace other_most_money = 1 if b2 == 21

lab var farming_most_money "Farming"
lab var employed_most_money "Employed"
lab var casual_most_money "Casual Employment"
lab var self_employ_most_money "Self-Employed"
lab var family_friends_most_money "Family/Friends/Spouse"
lab var other_most_money "Other Sources"


foreach x in farming_most_money employed_most_money casual_most_money /// 
self_employ_most_money family_friends_most_money other_most_money {
replace `x' = . if b2 == 99 | mi(b2)
}

gen sample2 = (age>=19 & age<=21) | (age>=31 & age<=33)

gen FPE_falsification = age>=19 & age<=21 if age<=33
lab var FPE_falsification "FPE Falsification"

* This is our key Diff-in-Diff interaction
gen Intensity = never_attend
gen FPExintensity = FPE_falsification * Intensity
lab var FPExintensity "FPE $\times$ Intensity"

compress

save "$d0/fpe_FinAccess2006_basefile.dta", replace


********************************************************************************
**** 		Step 9: Calculates poverty rates at the sub-region level		****
********************************************************************************

********************************************************************************
* Below calculates poverty rates at the sub-region level from the 1997 LSMS
* survey. It uses the same methodology that Lucas & Mbiti (2012) used in their
* publicly available do files 
********************************************************************************

use "$dr/finchild.dta", clear

* Crosswalk to marge county in FinAccess files to district in LSMS files *

gen county = 101 if district == 10		// Nairobi

replace county = 201 if district == 24	// NYANDARUA
replace county = 202 if district == 25	// NYERI
replace county = 203 if district == 22	// KIRINYAGA
replace county = 204 if district == 23	// MURANG'A
replace county = 205 if district == 21 	// KIAMBU

replace county = 301 if district == 34	// Mombasa
replace county = 302 if district == 32	// Kwale
replace county = 303 if district == 31	// KILIFI
replace county = 304 if district == 36	// TANA RIVER
replace county = 305 if district == 33	// LAMU
replace county = 306 if district == 35	// TAITA-TAVETA

replace county = 401 if district == 45	// MARSABIT
replace county = 402 if district == 42	// ISIOLO
replace county = 403 if district == 46 | district == 49	// MERU (Includes Nyambene)
replace county = 404 if district == 48	// THARAKANITHI
replace county = 405 if district == 40 | district == 41	// EMBU (includes Mberee)
replace county = 406 if district == 43	// KITUI
replace county = 407 if district == 44	// MACHAKOS
replace county = 408 if district == 47	// MAKUENI

replace county = 501 if district == 51	// Garissa
replace county = 502 if district == 53 	// Wajir
replace county = 503 if district == 52	// Mandera

replace county = 601 if district == 63	// SIAYA
replace county = 602 if district == 62	// KISUMU
replace county = 603 if district == 65	// MIGORI
replace county = 604 if district == 64	// HOMABAY
replace county = 605 if district == 61	// KISII
replace county = 606 if district == 66	// NYAMIRA

replace county = 701 if district == 85	// TURKANA
replace county = 702 if district == 87	// WEST POKOT
replace county = 703 if district == 83	// SAMBURU
replace county = 704 if district == 84	// TRANSNZOIA
replace county = 705 if district == 81	// BARINGO
replace county = 706 if district == 86	// UASIN GISHU
replace county = 707 if district == 82	// ELEGEYO MARAKWET
replace county = 708 if district == 75	// NANDI
replace county = 709 if district == 73	// LAIKIPIA
replace county = 710 if district == 74	// NAKURU
replace county = 711 if district == 76 | district == 78	// NAROK (includes Transmara)
replace county = 712 if district == 71	// KAJIADO
replace county = 713 if district == 72	// KERICHO
replace county = 714 if district == 77	// BOMET

replace county = 801 if district == 93	// KAKAMEGA
replace county = 802 if district == 94	// VIHIGA
replace county = 803 if district == 91	// BUNGOMA
replace county = 804 if district == 92	// BUSIA

* Get county to sub-region crosswalk from FinAccess file
preserve 
	use "$dr/FinAccess_2016_data_final_anonymized.dta", clear
	keep sub_region a2
	rename a2 county
	duplicates drop
	tempfile county_subregion_cross
	save `county_subregion_cross'
	
restore

merge m:1 county using `county_subregion_cross', keep(3) nogen

* This is how LM AEJ 2012 calculate the poverty rate they control for in their do files*
 drop if quitile==.
    g poverty_rate=poverty==2	
collapse (mean) poverty_rate [weight=adjweigh], by(sub_region)

lab var poverty_rate "Poverty rate (1997)"

lab data "calc_lsms_poverty_rates.do"
save "$d0/lsms_97_poverty_rates.dta", replace 


********************************************************************************
**** 		Step 10: Calculates Free Secondary Education Intensity			****
********************************************************************************

**** Calculates primary-to-secondary transition rates using the 2009 census ****

use "$dr/KE census2009.dta", clear


* District to county crosswalk, using county numbers from the FinAccess dataset
gen county = 101 if bplke>=101 & bplke<=110 			// Nairobi

replace county = 201 if bplke == 201 | bplke == 202	// NYANDARUA
replace county = 202 if bplke == 203 | bplke == 204	// NYERI
replace county = 203 if bplke == 205	// KIRINYAGA
replace county = 204 if bplke == 206 | bplke == 207 | bplke == 216	// MURANG'A
replace county = 205 if bplke == 208 | bplke == 209 | bplke == 210 | bplke == 211 | ///
						bplke == 212 | bplke == 213 | bplke == 214 | bplke == 215 | ///
						bplke == 217	// KIAMBU

replace county = 301 if bplke == 301 | bplke == 302	// Mombasa
replace county = 302 if bplke == 303 | bplke == 304 | bplke == 305	// Kwale
replace county = 303 if bplke == 306 | bplke == 307 | bplke == 308	// KILIFI
replace county = 304 if bplke == 309 | bplke == 310	// TANA RIVER
replace county = 305 if bplke == 311	// LAMU
replace county = 306 if bplke == 312 | bplke == 313	// TAITA-TAVETA

replace county = 401 if bplke == 401 | bplke == 402 | bplke == 403 | bplke == 404	// MARSABIT
replace county = 402 if bplke == 405 | bplke == 406	// ISIOLO
replace county = 403 if bplke == 407 | bplke == 408 | bplke == 409 | bplke == 410 | ///
						bplke == 412 | bplke == 413 // MERU
replace county = 404 if bplke == 411 | bplke == 414	// THARAKA-NITHI
replace county = 405 if bplke == 415 | bplke == 416	// EMBU
replace county = 406 if bplke == 417 | bplke == 418 | bplke == 419 | bplke == 420	// KITUI
replace county = 407 if bplke == 421 | bplke == 422 | bplke == 423 | bplke == 424 	// MACHAKOS
replace county = 408 if bplke == 425 | bplke == 426 | bplke == 427 | bplke == 428	// MAKUENI

replace county = 501 if bplke == 501 | bplke == 502 | bplke == 503 | bplke == 504	// Garissa
replace county = 502 if bplke == 505 | bplke == 506 | bplke == 507 | bplke == 508 	// Wajir
replace county = 503 if bplke == 509 | bplke == 510 | bplke == 511	// Mandera

replace county = 601 if bplke == 601 | bplke == 602 | bplke == 603	// SIAYA
replace county = 602 if bplke == 604 | bplke == 605 | bplke == 606	// KISUMU
replace county = 603 if bplke == 610 | bplke == 611 | bplke == 612 | bplke == 613	// MIGORI
replace county = 604 if bplke == 607 | bplke == 608 | bplke == 609	// HOMABAY
replace county = 605 if bplke == 614 | bplke == 615 | bplke == 616 | bplke == 617 | ///
						bplke == 618	// KISII
replace county = 606 if bplke == 619 | bplke == 620 | bplke == 621 // NYAMIRA

replace county = 701 if bplke == 701 | bplke == 702 | bplke == 703 	// TURKANA
replace county = 702 if bplke == 704 | bplke == 705 | bplke == 706	// WEST POKOT
replace county = 703 if bplke == 707 | bplke == 708 | bplke == 709	// SAMBURU
replace county = 704 if bplke == 710 | bplke == 711 | bplke == 712	// TRANSNZOIA
replace county = 705 if bplke == 713 | bplke == 714 | bplke == 715 | bplke == 716	// BARINGO
replace county = 706 if bplke == 717 | bplke == 718 | bplke == 719	// UASIN GISHU
replace county = 707 if bplke == 720 | bplke == 721	// ELEGEYO MARAKWET
replace county = 708 if bplke == 722 | bplke == 723 | bplke == 724 | bplke == 725 | ///
						bplke == 726	// NANDI
replace county = 709 if bplke == 727 | bplke == 728 | bplke == 729	// LAIKIPIA
replace county = 710 if bplke == 730 | bplke == 731 | bplke == 732 | bplke == 733	// NAKURU
replace county = 711 if bplke == 734 | bplke == 735 | bplke == 736	// NAROK
replace county = 712 if bplke == 737 | bplke == 738 | bplke == 744	// KAJIADO
replace county = 713 if bplke == 739 | bplke == 740	// KERICHO
replace county = 714 if bplke == 743 | bplke == 741 | bplke == 742	// BOMET

replace county = 801 if bplke == 801 | bplke == 802 | bplke == 803 | bplke == 804 | ///
						bplke == 805 | bplke == 809 | bplke == 810 // KAKAMEGA
replace county = 802 if bplke == 806 | bplke == 807 | bplke == 808	// VIHIGA
replace county = 803 if bplke == 811 | bplke == 812 | bplke == 813 | bplke == 814 | ///
						bplke == 815	// BUNGOMA
replace county = 804 if bplke == 816 | bplke == 817 | bplke == 818 | bplke == 819 | ///
						bplke == 820	// BUSIA



tab bplke if mi(county)	// got everyone that isn't foreign born or unknown
* Drop those with unknown birthplace and those foreign born
drop if bplke >=900

*** Get sub-regions from FinAccess Data ***
gen a2 = county



* Get county to sub-region crosswalk from FinAccess file
preserve 
	use "$dr/FinAccess_2016_data_final_anonymized.dta", clear
	keep sub_region a2
	rename a2 county
	duplicates drop
	tempfile county_subregion_cross
	save `county_subregion_cross'

restore

merge m:1 county using `county_subregion_cross', nogen assert(3)

*** Calculate attendence rate ***	

* Brudevold-Newman (2016) uses those born in 1989 and 1990  as the counterfactual group
* - or those aged 18 and 19 when the program went into effect. 
* Let's uses those aged 20-30 for our comparison

gen no_sec_trans = educke==10 	/// Define based on kenyan education and completing  standard 8
						if ((educke>=10 & educke<=16) | (educke >=40 & educke<=45))	// Conditional on completing primary

keep if age>=20 & age<=30						
						
collapse (mean) no_sec_trans [pw=perwt], by(sub_region)

lab var no_sec_trans "Free Secondary Education Intensity"

lab data "calc_sec_transition_rates.do"
save "$d0/fse_intensity.dta", replace  
